library(peprDnaStability)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(knitr)
gel_data_dir <- "stability_example"
gel_metadata <- "stability_example/MG001_stability_metadata.csv"
## number of gel lanes ran
number_of_lanes <- 12
gel_design <- read.csv(gel_metadata) %>%
mutate(gel = sub(pattern = ".*_Gel_0",replacement = "Gel-", x = gel),
gel = sub(pattern = ".jpg",replacement = "", x = gel))
ggplot(gel_design) + geom_raster(aes(x = gel, y = lane, fill = condition), alpha = 0.5) +
geom_text(aes(x = gel, y = lane, label = vial)) + facet_wrap(~gel,nrow = 1, scales = "free_x") + theme_bw() + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom", legend.direction = "horizontal") + labs(y = "Lane", fill = "Conditions")
Images were croped using preview. Below are the quality control plots for processing the raw image files.
Rdat_files <- list.files(path = gel_data_dir,pattern = "Rdata",full.names = TRUE)
lane_labels <- paste0("L",c(paste0("0",1:9),1:12))[1:number_of_lanes]
for(gel_dat in Rdat_files){
dat_list <- process_gel(gel_dat)
print(gel_dat)
annotated_gel(image_mat = dat_list['image_dat'],
lane_edges_df = dat_list[5][[1]]$lane_edges_df ,
peaks_df = dat_list['marker_dat'],
lane_labels = lane_labels)
}
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-37C-S-1.Rdata"
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-37C-S-2.Rdata"
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-37C-S-3.Rdata"
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-4C-S-1.Rdata"
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-4C-S-2.Rdata"
batch_data <- batch_process_gels(gel_data_dir)
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## combine data frames
intensity_df <- dplyr::data_frame()
markers_df <- dplyr::data_frame()
binned_df <- dplyr::data_frame()
for(x in batch_data){
gel_name <- x$gel
intensity_df <- x$intensity_dat %>%
dplyr::mutate(gel = gel_name) %>%
dplyr::bind_rows(intensity_df)
markers_df <- x$marker_dat %>%
dplyr::mutate(gel = gel_name) %>%
dplyr::bind_rows(markers_df)
binned_df <- x$bin_dat %>%
dplyr::mutate(gel = gel_name) %>%
dplyr::bind_rows(binned_df)
}
## adding metadata
meta <- read.csv(gel_metadata,stringsAsFactors = F) %>% filter(condition != "Blank") %>%
mutate(lane_labels = lane, lane = gsub("L0","L",lane))
intensity_df$lane <- as.character(intensity_df$lane)
intensity <- left_join(intensity_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
markers <- left_join(markers_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
binned_df$lane <- as.character(binned_df$lane)
binned <- left_join(binned_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
highlow_bin <- binned %>%
filter(!is.na(vial), bins %in% c("23.1-9.42", "48.5-23.1",
"97-48.5","194-97")) %>%
mutate(bins = ifelse(bins %in% c("23.1-9.42", "48.5-23.1"),
"low","high")) %>%
group_by(lane, bins, gel, vial, condition) %>%
summarise(bin_intensity = sum(bin_intensity))
highlow_ratio <- highlow_bin %>%
spread(bins, bin_intensity) %>%
mutate(bin_ratio = high/(low + high))
highlow_ratio_control<- highlow_ratio %>% filter(condition == "Control")
control_ratio <- highlow_ratio_control %>%
group_by(gel) %>%
summarise(control_ratio = mean(bin_ratio))
highlow_diff <- highlow_ratio %>%
left_join(control_ratio) %>%
mutate(ratio_diff = bin_ratio - control_ratio) %>%
filter(condition != "Control" & condition != "Ladder") %>%
mutate(gel = sub(pattern = ".*Gel_0",replacement = "", x = gel),
gel = sub(".jpg","", gel))
## Joining by: "gel"
ggplot(highlow_diff) + geom_point(aes(x = condition, y = ratio_diff, color = gel)) +
labs(x = "Treatment", y = "Ratio Difference from Control") +
theme_bw()
make_one_sided_ttest_df <- function (dat, value, gelAsFactor = TRUE) {
tt_df <- data_frame()
## Pairwise comparison to control for each bin and each condition
for(bin in unique(dat$bins)){
df <- dat %>% filter(bins == bin)
y <- df %>% filter(condition == "Control") %>% .[[value]]
for(treatment in unique(df$condition[df$condition != "Control"])){
x <-df %>% filter(condition == treatment) %>% .[[value]]
tt <- t.test(x,y,alternative = "less")
tt_df <- data_frame(size_bins = bin,
condition = treatment,
tstatistic = tt$statistic,
pvalue = tt$p.value) %>%
bind_rows(tt_df)
}
}
tt_df %>% mutate(sig = ifelse(pvalue < 0.05/n(),TRUE,FALSE))
}
tt_df <- highlow_ratio %>% mutate(bins = "H") %>%
filter(condition != "Ladder") %>%
make_one_sided_ttest_df(value = "bin_ratio", gelAsFactor = FALSE)
tt_table <- tt_df %>% mutate(pvalue = round(pvalue, 4) %>% as.character(),
pvalue = ifelse(sig == TRUE, paste0("__",pvalue,"__"),pvalue),
pvalue = ifelse(pvalue == "__0__", "__<0.0000__", pvalue),
tstatistic = round(tstatistic, 2)) %>%
select(-sig) %>%
spread(size_bins, pvalue)
colnames(tt_table) <- c("Treatment", "T-Statistic", "p-value")
kable(tt_table, row.names = FALSE, caption = "P-value of one sided t-tests comparing proprotion of DNA in high and low molecular weight bins between treatment and control. Significant p-values (alpha = 0.0125) are indicated in bold.")
| Treatment | T-Statistic | p-value |
|---|---|---|
| 2w37c | -0.99 | 0.1668 |
| 2w4c | 0.58 | 0.7162 |
| 8w37c | -6.87 | <0.0000 |
| 8w4c | 0.57 | 0.7112 |
sessionInfo()
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.9.5 (Mavericks)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.10.5 ggplot2_1.0.1
## [3] tidyr_0.2.0 dplyr_0.4.2
## [5] peprDnaStability_0.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.0 magrittr_1.5 MASS_7.3-40 munsell_0.4.2
## [5] colorspace_1.2-6 R6_2.1.0 jpeg_0.1-8 highr_0.5
## [9] stringr_1.0.0 plyr_1.8.3 tools_3.2.1 parallel_3.2.1
## [13] grid_3.2.1 gtable_0.1.2 DBI_0.3.1 htmltools_0.2.6
## [17] lazyeval_0.1.10 yaml_2.1.13 assertthat_0.1 digest_0.6.8
## [21] reshape2_1.4.1 formatR_1.2 evaluate_0.7 rmarkdown_0.7
## [25] labeling_0.3 stringi_0.5-5 scales_0.2.5 proto_0.3-10